Load required libraries

library("hdxstats")
library("dplyr")
library("ggplot2")
library("RColorBrewer")
library("tidyr")
library("pheatmap")
library("scales")
library("viridis")
library("patchwork")
library("Biostrings")
library("xfun")
library("tidyverse")
source("R/test_script_app2.R")

Parsing Raw Data

Parse input raw data and output QFeatures object instance given a CSV file path for different test cases.

# First test case
csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/MBP.csv"

# CASE 1: Parse data, given input data file path + parameter file
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp")

# CASE 2: Parse data, given input data file path + list of parameters
data <- read_csv(csv_filepath)
myparameters <- make_parameter_file(data)
hdx_data <- extract_hdx_data(csv_filepath, parameters = myparameters)

# CASE 3: Parse data, given input data file path. Work out parameters for pre-processing using interactive mode
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp", interactive = TRUE)

Data Analysis of Deuterium Uptake Kinetics

TEST 1:

Perform Data Analysis: Plain fitting

  • Deuterium-update fitting for all peptides
  • Plain fitting - not comparing against a reference state or condition
# INPUT
data_selection <- hdx_data[,1:24]
all_peptides <- rownames(data_selection)[[1]]
starting_parameters <- list(a = NULL, b = 0.001,  d = NULL, p = 1)
# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "fit", 
                            peptide_selection = all_peptides, 
                            start = starting_parameters)
[1] "INFO: Performing fitting of Deuterium uptake kinetics. Method: 'hdxstats::fitUptakeKinetics' "
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "Could not fit model, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "Could not fit model, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
Error in nlsModel(formula, mf, start, wts) : 
  singular gradient matrix at initial parameter estimates
[1] "model fit failed, likely exessive missing values"
results$method
[1] "fitUptakeKinetics"

Visualise Output from Data Analysis

View fitting curves of Deu-uptake kinetics for all available conditions associated to selected peptides

graphics_kinetic <- visualise_hdx_data(results, type="kinetics") 
[1] "INFO: I found  104  models in your results data."
graphics_kinetic[[1]] | graphics_kinetic[[2]] | graphics_kinetic[[3]]

View forest plots showing the difference between uptake measurements between conditions, plus the dispersion between their respective fitting model parameters

graphics <- visualise_hdx_data(results, type="forest") 
[1] "INFO: I found  104  models in your results data."

Combine graphical outputs in a single canvas

graphics[[1]] | graphics_kinetic[[1]]

Display numerical values as a table

graphics[[1]]$data

Perform Data Analysis: Differential fitting

  • Deuterium-update fitting for data selection
  • Differential fitting - with respect to a single reference peptide
# INPUT 
data_selection <- hdx_data[,1:100]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.0001,  d = NULL, p = 1)

# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "dfit", 
                            peptide_selection = all_peptides[37], 
                            start = starting_parameters)
[1] "INFO: Performing differential fitting of Deuterium uptake kinetics. Method: 'hdxstats::differentialUptakeKinetics' "
[1] "INFO: You did not specify a 'formula' for your fitting model."
[1] "INFO: Fitting will be performed for (default): 'formula <- value ~ a * (1 - exp(-b*(timepoint)^p)) + d' "
graphics <- visualise_hdx_data(results, type="kinetics")
[1] "INFO: I found  7  models in your results data."

View graphical output for reference peptide shown in LHS Top corner.

graphics

TEST CASE 2

Single-domain antibody (sdAb) binding assays to HOIP.

csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/N64184_1a2_state.csv" 
data <- read_csv(csv_filepath)
Rows: 3600 Columns: 16── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Protein, Sequence, State
dbl (11): Start, End, MaxUptake, MHP, Exposure, Center, Center SD, Uptake, Uptake SD, RT, RT SD
lgl  (2): Modification, Fragment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
make_parameter_file(data, save = "vignettes/data/N64184_1a2_state.hdxp")
Error in make_parameter_file(data, save = "vignettes/data/N64184_1a2_state.hdxp") : 
  could not find function "make_parameter_file"
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/N64184_1a2_state.hdxp")
[1] "INFO: You gave me a CSV file of your HDX-MSM data"
[1] "INFO: I will pre-process your data parse it using QFeatures ..."
[1] "INFO: You provided a 'parameter_file', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: You provided a list of 'parameters', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: Stripped your 'Exposure_Time' values from non-numeric characters."
[1] "INFO: Your original_time_units == 'm'. I will convert your 'Exposure_Time' values to seconds (s)."
[1] "INFO: Your 'Replicate' column appears to be NA. I will add this column with 1 values just to label your data."
[1] "INFO: Your 'Charge' column appears to be NA. I will add this column with 0 values just to label your data."
[1] "INFO: Reformatting your data to a wide format..."
[1] "INFO: Parsing your data as a qDF object class instance. Method: parseDeutData"
[1] "WARNING: Your output data is not normalised."
[1] "WARNING: Your output data was not saved. You can provide an output path with 'save = my_path'"
[1] "INFO: I pre-processed you input CSV data content and now it's available as a QFeatures instance"

# INPUT 
data_selection <- hdx_data[,1:33]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.01,  d = NULL)
formula = value ~ a * (1 - exp(-b*(timepoint))) + d

# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "dfit", 
                            peptide_selection = all_peptides[3], 
                            start = starting_parameters,
                            formula = formula)
[1] "INFO: Performing differential fitting of Deuterium uptake kinetics. Method: 'hdxstats::differentialUptakeKinetics' "
[1] "INFO: You specified your own 'formula' for your fitting model."
graphics <- visualise_hdx_data(results, type="kinetics")
[1] "INFO: I found  11  models in your results data."
custom_colors <- scale_color_manual(values = colorRampPalette(brewer.pal(8, name = "Set2"))(11))
graphics + custom_colors
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.

TEST CASE 3

Original raw data

#csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data.csv"
#data <- read_csv(csv_filepath)
#data$Replicate <- unlist(lapply(strsplit(data$File, split="_"), function(x) tail(x, n=1)))
#write_csv(data, file = "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv")

csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv"

Had to pre-process its content to add Replicate column

data <- read_csv(csv_filepath)
Rows: 31855 Columns: 16── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): Protein, Sequence, State, File
dbl (10): Start, End, MaxUptake, MHP, Exposure, z, RT, Inten, Center, Replicate
lgl  (2): Modification, Fragment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/Project_2_SecA_Cluster_Data.hdxp")
[1] "INFO: You gave me a CSV file of your HDX-MSM data"
[1] "INFO: I will pre-process your data parse it using QFeatures ..."
[1] "INFO: You provided a 'parameter_file', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: You provided a list of 'parameters', I will extract parameters from this to format your output QFeatures data object."
[1] "INFO: Stripped your 'Exposure_Time' values from non-numeric characters."
[1] "INFO: Your original_time_units == 'm'. I will convert your 'Exposure_Time' values to seconds (s)."
[1] "INFO: Reformatting your data to a wide format..."
[1] "INFO: Parsing your data as a qDF object class instance. Method: parseDeutData"
Making assay rownames unique.
[1] "WARNING: Your output data is not normalised."
[1] "WARNING: Your output data was not saved. You can provide an output path with 'save = my_path'"
[1] "INFO: I pre-processed you input CSV data content and now it's available as a QFeatures instance"
pheatmap(t(assay(hdx_data)),
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         color = brewer.pal(n = 9, name = "BuPu"),
         main = "secA heatmap", 
         fontsize = 14,
         legend_breaks = c(0, 1, 2, 3, 4, 5, 6, max(assay(hdx_data))),
         legend_labels = c("0", "1", "2", "3", "4", "5", "6", "Incorporation"))

graphics <- visualise_hdx_data(results, type="kinetics") # READY
graphics <- visualise_hdx_data(results, type="forest") # READY

# NOTE: This only works to compare ONLY TWO distinct conditions, for the same timepoint, for all peptide fragments
# This needs to define a BASELINE state/condition and an ALTERNATE sate/condition
# NOTE: Data must be normalised by intercept 
graphics <- visualise_hdx_data(results, type="manhatten", reference = NULL)# <<<<--- NEXT

# NOTE: NOT SURE ??
graphics <- visualise_hdx_data(results, type="epitope", level="peptide", fasta = "my_fasta_path") # Return an EpitopeMap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB

graphics <- visualise_hdx_data(results, type="protection", level="peptide") # Return heatmap for peptide residue-number ranges
graphics <- visualise_hdx_data(results, type="protection", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="protection", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB

# GUI: Make GUI by assembling these building blocks
---
title: "R Notebook"
output: html_notebook
---

# Load required libraries

```{r}
library("hdxstats")
library("dplyr")
library("ggplot2")
library("RColorBrewer")
library("tidyr")
library("pheatmap")
library("scales")
library("viridis")
library("patchwork")
library("Biostrings")
library("xfun")
library("tidyverse")
```

```{r}
source("R/test_script_app2.R")
```


# Parsing Raw Data

Parse input raw data and output `QFeatures` object instance given a CSV file path for different test cases.

```{r}
# First test case
csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/MBP.csv"

# CASE 1: Parse data, given input data file path + parameter file
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp")

# CASE 2: Parse data, given input data file path + list of parameters
data <- read_csv(csv_filepath)
myparameters <- make_parameter_file(data)
hdx_data <- extract_hdx_data(csv_filepath, parameters = myparameters)

# CASE 3: Parse data, given input data file path. Work out parameters for pre-processing using interactive mode
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/myparameters.hdxp", interactive = TRUE)
```

# Data Analysis of Deuterium Uptake Kinetics

## TEST 1:

### Perform Data Analysis: Plain fitting

* Deuterium-update fitting for all peptides
* Plain fitting - not comparing against a reference state or condition

```{r}
# INPUT
data_selection <- hdx_data[,1:24]
all_peptides <- rownames(data_selection)[[1]]
starting_parameters <- list(a = NULL, b = 0.001,  d = NULL, p = 1)
# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "fit", 
                            peptide_selection = all_peptides, 
                            start = starting_parameters)
```

```{r}
results$method
```

### Visualise Output from Data Analysis

View fitting curves of Deu-uptake kinetics for all available conditions associated to selected peptides

```{r, fig.width= 22, fig.height = 7}
graphics_kinetic <- visualise_hdx_data(results, type="kinetics") 
graphics_kinetic[[1]] | graphics_kinetic[[2]] | graphics_kinetic[[3]]
```

View forest plots showing the difference between uptake measurements between conditions, plus the dispersion between their respective fitting model parameters

```{r, fig.width= 22, fig.height = 10}
graphics <- visualise_hdx_data(results, type="forest")
```
Combine graphical outputs in a single canvas

```{r, fig.width= 22, fig.height = 10}
graphics[[1]] | graphics_kinetic[[1]]
```
Display numerical values as a table 

```{r}
graphics[[1]]$data
```

### Perform Data Analysis: Differential fitting

* Deuterium-update fitting for data selection 
* Differential fitting - with respect to a *single reference* peptide

```{r}
# INPUT 
data_selection <- hdx_data[,1:100]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.0001,  d = NULL, p = 1)

# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "dfit", 
                            peptide_selection = all_peptides[37], 
                            start = starting_parameters)

```

```{r}
graphics <- visualise_hdx_data(results, type="kinetics")
```

View graphical output for reference peptide shown in LHS Top corner. 

```{r}
graphics
```

## TEST CASE 2

Single-domain antibody (sdAb) binding assays to HOIP.


```{r}
csv_filepath <- "/homes/sanjuan/R/x86_64-pc-linux-gnu-library/4.1/hdxstats/extdata/N64184_1a2_state.csv" 
```


```{r}
data <- read_csv(csv_filepath)
make_parameter_file(data, save = "vignettes/data/N64184_1a2_state.hdxp")
```


```{r}
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/N64184_1a2_state.hdxp")
```


```{r}

# INPUT 
data_selection <- hdx_data[,1:33]
all_peptides <- rownames(data_selection)[[1]] # get all peptides
starting_parameters <- list(a = NULL, b = 0.01,  d = NULL)
formula = value ~ a * (1 - exp(-b*(timepoint))) + d

# OUTPUT
results <- analyse_kinetics(data = data_selection, 
                            method = "dfit", 
                            peptide_selection = all_peptides[3], 
                            start = starting_parameters,
                            formula = formula)
```


```{r}
graphics <- visualise_hdx_data(results, type="kinetics")

custom_colors <- scale_color_manual(values = colorRampPalette(brewer.pal(8, name = "Set2"))(11))
graphics + custom_colors
```


# TEST CASE 3

Original raw data
```{r}
#csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data.csv"
#data <- read_csv(csv_filepath)
#data$Replicate <- unlist(lapply(strsplit(data$File, split="_"), function(x) tail(x, n=1)))
#write_csv(data, file = "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv")

csv_filepath <- "inst/extdata/Project_2_SecA_Cluster_Data_edited.csv"
```

Had to pre-process its content to add Replicate column

```{r}
data <- read_csv(csv_filepath)
make_parameter_file(data, save = "vignettes/data/Project_2_SecA_Cluster_Data.hdxp")
```

```{r}
hdx_data <- extract_hdx_data(csv_filepath, parameter_file = "vignettes/data/Project_2_SecA_Cluster_Data.hdxp")
```


```{r, fig.height = 20, fig.width = 80, fig.align = "center"}
pheatmap(t(assay(hdx_data)),
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         color = brewer.pal(n = 9, name = "BuPu"),
         main = "secA heatmap", 
         fontsize = 14,
         legend_breaks = c(0, 1, 2, 3, 4, 5, 6, max(assay(hdx_data))),
         legend_labels = c("0", "1", "2", "3", "4", "5", "6", "Incorporation"))
```


```{r}
graphics <- visualise_hdx_data(results, type="kinetics") # READY
graphics <- visualise_hdx_data(results, type="forest") # READY

# NOTE: This only works to compare ONLY TWO distinct conditions, for the same timepoint, for all peptide fragments
# This needs to define a BASELINE state/condition and an ALTERNATE sate/condition
# NOTE: Data must be normalised by intercept 
graphics <- visualise_hdx_data(results, type="manhatten", reference = NULL)# <<<<--- NEXT

# NOTE: NOT SURE ??
graphics <- visualise_hdx_data(results, type="epitope", level="peptide", fasta = "my_fasta_path") # Return an EpitopeMap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="epitope", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB

graphics <- visualise_hdx_data(results, type="protection", level="peptide") # Return heatmap for peptide residue-number ranges
graphics <- visualise_hdx_data(results, type="protection", level="residue", fasta = "my_fasta_path") # Return heatmap
graphics <- visualise_hdx_data(results, type="protection", level="residue", pdb="my_pdb_path") # Return heatmap projected onto PDB

# GUI: Make GUI by assembling these building blocks

```

